## ********************************
##          C2.1 City Analysis       
##        Hao Zhang & Ye Zhang
##             2025/7/24
## ********************************

library(readstata13)
library(lfe)
library(stargazer)
library(tidyverse)
library(doBy)
library(data.table)
library(DescTools)

# set working directory
setwd("../raw data")

# read in data in the CIES folder
options(scipen = 100)
firm <- read.dta13("cies 1998-2007.dta") %>% 
  mutate(citycode = as.numeric(substr(as.character(city), 0, 4)))

# create ownership types
firm$ownership <- as.numeric(firm$ownership)
firm <- firm %>% mutate(soe = ifelse(ownership == 110 | ownership == 151, 1, 0),
                        hmt = ifelse(ownership > 199 & ownership < 299, 1, 0),
                        fie = ifelse(ownership > 299, 1, 0), 
                        private = ifelse(soe == 0 & hmt == 0 & fie == 0, 1, 0))

# create insurance dummy
firm <- firm %>% filter(insurance >= 0) %>% 
  mutate(dummy1 = ifelse(insurance == 0, 0, 1))

# create total_insurance and insurance rate
firm_base1 <- firm %>% filter(year >= 2003, year <= 2006) %>% 
  select(citycode, year, frdm, wage) %>% filter(wage > 0) %>%
  mutate(year = year + 1)
firm_base2 <- firm %>% 
  select(citycode, year, frdm, wage) %>% filter(wage > 0) %>%
  mutate(year = year + 1)
firm2004 <- firm %>% filter(year >= 2004) %>%
  filter(insurance >= 0, medical >= 0) %>% 
  mutate(total_insurance = insurance + medical) %>%
  mutate(dummy2 = ifelse(medical == 0, 0, 1))
firm <- left_join(firm, firm_base2, by = c("citycode", "year", "frdm"), relationship = "many-to-many")
firm2004 <- left_join(firm2004, firm_base1, by = c("citycode", "year", "frdm"), relationship = "many-to-many")
firm <- firm %>% mutate(rate = insurance/wage.y)
firm2004 <- firm2004 %>% mutate(rate = medical/wage.y)
firm$rate <- Winsorize(firm$rate, quantile(firm$rate, probs = c(0.05, 0.95), na.rm = TRUE))
firm2004$rate <- Winsorize(firm2004$rate, quantile(firm2004$rate, probs = c(0.05, 0.95), na.rm = TRUE))

# clean city panel and city leadership
city <- read.dta13("city panel 1995-2010.dta")
city <- city %>% filter(substr(as.character(city$citycode), 3, 4) != "01")
city1 <- city %>% select(citycode, year, gdp) %>% mutate(year = year + 1)
city <- left_join(city, city1, by = c("citycode", "year"))
city$gdpgrowth <- city$gdp.x/city$gdp.y - 1
city <- city %>% mutate(province = as.integer(citycode/100)) %>% 
  filter(province != 11, province != 12, province != 31, citycode != 5102) %>%
  group_by(province, year) %>%
  mutate(gdpgrowth_mean = mean(gdpgrowth, na.rm = TRUE)) %>% 
  mutate(year = year + 1) %>% filter(gdpgrowth >= -0.2, gdpgrowth <= 0.6) %>% #hist set gdpgrowth < 3
  mutate(gdp_pressure = gdpgrowth_mean - gdpgrowth) %>%
  rename(gdp = gdp.x)
mayor <- read.dta13("city mayor 1990-2015.dta") %>% 
  select(citycode, year, code, begin, end, educ, ethnicity, gender, birth_year, seq)

# add tax revenue and other city level variables
city1 <- readxl::read_xlsx("city panel 1996-2014.xlsx") # warnings due to conversion from text to number for the year variable for districts and provinces
citycode <- readxl::read_xlsx("citycode.xlsx") %>%
  mutate(citycode = as.integer(citycode/100))
city1 <- left_join(city1, citycode, by = "city", relationship = "many-to-many") %>% distinct() %>%
  filter(year >= 1999 & year <= 2007)
city <- left_join(city, city1, by = c("citycode", "year"))

################################################################################
# test with coverage (robust to province FE)

# run inverse hyperbolic sine
ihs <- function(x) {
  y <- log(x + sqrt(x ^ 2 + 1))
  return(y)
}

# test with city panels - unemployment
noinsurance <- summaryBy(dummy1 ~ citycode + year, data = firm, FUN = mean, na.rm=TRUE)
profit <- summaryBy(profit ~ citycode + year, data = firm, FUN = sum, na.rm=TRUE)
firm$count <- 1
number <- summaryBy(count ~ citycode + year, data = firm, FUN = sum, na.rm=TRUE)
noinsurance <- left_join(noinsurance, profit, by = c("citycode", "year"))
noinsurance <- left_join(noinsurance, city, by = c("citycode", "year"))
noinsurance <- left_join(noinsurance, mayor, by = c("citycode", "year"))
noinsurance <- left_join(noinsurance, number, by = c("citycode", "year"))
noinsurance <- noinsurance %>% filter(citycode != 1100, citycode != 1200, 
                                      citycode != 3100, citycode != 5102)
noinsurance <- noinsurance %>% filter(gdpgrowth <= 0.6 & gdpgrowth >= -0.2) %>% 
  mutate(capacity = (tax_revenue*100000 - profits)/gdp/10, deficit = expenditure - revenue_allcity, unemp_rate = unemployment/population)

# control for institutions and run regressions
noinsurance <- noinsurance %>% mutate(province = as.integer(citycode/100)) %>% 
  filter(year >= 2001) %>%
  mutate(tax = ifelse(province == 33 & year >= 2004 | province == 13 & year >= 2002 | 
                        province == 42 & year >= 2002 | province == 44 & year >= 2002 | 
                        province == 32 & year >= 2005 | province == 53 & year >= 2006, 1, 0))
m1 <- felm(dummy1.mean ~ gdp_pressure + gdpgrowth | citycode + year | 0 | citycode, data = noinsurance)
m2 <- felm(dummy1.mean ~ gdp_pressure + gdpgrowth + log(gdp) + log(gdp/population) + 
             log(FDI + 1) + ihs(deficit/gdp) + tax + ihs(capacity) + begin | 
             citycode + year + educ + ethnicity  + gender  + connection + 
             seq  | 0 | citycode, data = noinsurance)

# test with city panels - medical insurance
noinsurance <- summaryBy(dummy2 ~ citycode + year, data = firm2004, FUN = mean, na.rm=TRUE)
profit <- summaryBy(profit ~ citycode + year, data = firm2004, FUN = sum, na.rm=TRUE)
firm$count <- 1
number <- summaryBy(count ~ citycode + year, data = firm, FUN = sum, na.rm=TRUE)
noinsurance <- left_join(noinsurance, profit, by = c("citycode", "year"))
noinsurance <- left_join(noinsurance, city, by = c("citycode", "year"))
noinsurance <- left_join(noinsurance, mayor, by = c("citycode", "year"))
noinsurance <- left_join(noinsurance, number, by = c("citycode", "year"))
noinsurance <- noinsurance %>% filter(citycode != 1100, citycode != 1200, 
                                      citycode != 3100, citycode != 5102)
noinsurance <- noinsurance %>% filter(gdpgrowth <= 0.6 & gdpgrowth >= -0.2) %>% 
  mutate(capacity = (tax_revenue*100000 - profits)/gdp/10, deficit = expenditure - revenue_allcity, unemp_rate = unemployment/population)

# run regressions
noinsurance <- noinsurance %>% mutate(province = as.integer(citycode/100)) %>% 
  filter(year >= 2001) %>%
  mutate(tax = ifelse(province == 33 & year >= 2004 | province == 13 & year >= 2002 | 
                        province == 42 & year >= 2002 | province == 44 & year >= 2002 | 
                        province == 32 & year >= 2005 | province == 53 & year >= 2006, 1, 0))
m3 <- felm(dummy2.mean ~ gdp_pressure + gdpgrowth | citycode + year | 0 | citycode, data = noinsurance)
m4 <- felm(dummy2.mean ~ gdp_pressure + gdpgrowth + log(gdp) + log(gdp/population) + 
             log(FDI + 1) + ihs(deficit/gdp) + tax + ihs(capacity) + begin
           | citycode + year + educ + ethnicity  + gender + connection + seq | 
             0 | citycode, data = noinsurance)

# output regression table
stargazer(m1, m2, m3, m4, title = "Interjurisdictional Competition and Social Insurances at City Level", 
          dep.var.labels = c("Unemployment", "Medical and Pension"),
          omit.stat = c("rsq", "adj.rsq", "ser"), 
          font.size = "small", 
          add.lines = list(c("Mayor Characteristics", "N", "Y", "N", "Y"), 
                           c("City Fixed Effects", "Y", "Y", "Y", "Y"), 
                           c("Year Fixed Effects", "Y", "Y", "Y", "Y")))
